STATS506 - Assignment 5

Author

Prathibha Muthukumara Prasanna

Problem 1 - OOP Programming

Problem 1a

Code
#' Wald-style Normal Approximation Confidence Interval (S4 Class)

setClass("waldCI",
         slots = c(
           level = "numeric",
           mean = "numeric",
           se = "numeric",
           lower = "numeric",
           upper = "numeric"
         )
)

I’ve defined a new S4 class called waldCI. It has 5 slots. Each slot stores a numeric value.

Code
 make_wald_ci <- function(level, mean = NULL, se = NULL, lower = NULL, upper = NULL) {
  
  # Check that level was provided
  if (missing(level)) {
    stop("level is required")
  }
  
  # Quick validation of level
  if (level <= 0 || level >= 1) {
    stop("level must be between 0 and 1 (exclusive)")
  }
  
  # Check which specification is being used
  has_mean_se <- !is.null(mean) && !is.null(se)
  has_bounds <- !is.null(lower) && !is.null(upper)
  
  # Must provide one specification but not both
  if (!has_mean_se && !has_bounds) {
    stop("Must provide either (mean and se) or (lower and upper)")
  }
  
  if (has_mean_se && has_bounds) {
    stop("Cannot provide both (mean, se) and (lower, upper). Choose one specification.")
  }
  
  # Calculate z-score for the given confidence level
  alpha <- 1 - level
  z <- qnorm(1 - alpha / 2)
  
  # Case 1: Mean and SE provided - calculate bounds
  if (has_mean_se) {
    # Check for negative or zero SE
    if (se <= 0) {
      stop("se must be positive")
    }
    
    # Check for infinite or NA values
    if (!is.finite(mean)) {
      stop("mean must be a finite number")
    }
    if (!is.finite(se)) {
      stop("se must be a finite number")
    }
    
    lower <- mean - z * se
    upper <- mean + z * se
  }
  
  # Case 2: Bounds provided - back-calculate mean and SE
  if (has_bounds) {
    # Check for infinite bounds
    if (!is.finite(lower) || !is.finite(upper)) {
      stop("Confidence interval bounds must be finite")
    }
    
    # Check that lower < upper
    if (lower >= upper) {
      stop("lower must be less than upper")
    }
    
    mean <- (lower + upper) / 2
    se <- (upper - lower) / (2 * z)
  }
  
  # Create and return the waldCI object
  new("waldCI",
      level = level,
      mean = mean,
      se = se,
      lower = lower,
      upper = upper)
}

The custom constructor function accepts two ways to create a CI - 1) from the mean and SE and 2) from bounds. It validates inputs before creating the object using new() internally.

Code
setValidity("waldCI", function(object) {
  errors <- character()
  
  # Check that level is a single number between 0 and 1
  if (length(object@level) != 1) {
    errors <- c(errors, "level must be a single numeric value")
  } else if (!is.finite(object@level)) {
    errors <- c(errors, "level must be a finite number")
  } else if (object@level <= 0 || object@level >= 1) {
    errors <- c(errors, "level must be between 0 and 1 (exclusive)")
  }
  
  # Check that mean is finite
  if (length(object@mean) != 1) {
    errors <- c(errors, "mean must be a single numeric value")
  } else if (!is.finite(object@mean)) {
    errors <- c(errors, "mean must be a finite number")
  }
  
  # Check that SE is positive and finite
  if (length(object@se) != 1) {
    errors <- c(errors, "se must be a single numeric value")
  } else if (!is.finite(object@se)) {
    errors <- c(errors, "se must be a finite number")
  } else if (object@se <= 0) {
    errors <- c(errors, "se must be positive")
  }
  
  # Check that bounds are finite
  if (!is.finite(object@lower)) {
    errors <- c(errors, "lower bound must be finite")
  }
  if (!is.finite(object@upper)) {
    errors <- c(errors, "upper bound must be finite")
  }
  
  # Check that lower bound is less than upper bound
  if (is.finite(object@lower) && is.finite(object@upper) && object@lower >= object@upper) {
    errors <- c(errors, "lower bound must be less than upper bound")
  }
  
  # Check that mean is between the bounds
  if (is.finite(object@mean) && is.finite(object@lower) && is.finite(object@upper)) {
    if (object@mean < object@lower || object@mean > object@upper) {
      errors <- c(errors, "mean must be between lower and upper bounds")
    }
  }
  
  # If no errors, return TRUE; otherwise return the errors
  if (length(errors) == 0) TRUE else errors
})
Class "waldCI" [in ".GlobalEnv"]

Slots:
                                              
Name:    level    mean      se   lower   upper
Class: numeric numeric numeric numeric numeric

The validator checks if a waldCI object is valid. It checks 1) confidence level is between 0 and 1, 2) standard error is positive, 3) lower bound < upper bound and 4) mean is between the bounds. If everything is okay, it returns TRUE. If something is wrong, it returns error messages explaining what’s wrong.

Code
setMethod("show", "waldCI", function(object) {
  cat("Wald Confidence Interval\n")
  cat(sprintf("Level:      %.1f%%\n", object@level * 100))
  cat(sprintf("Estimate:   %.4f\n", object@mean))
  cat(sprintf("Std Error:  %.4f\n", object@se))
  cat(sprintf("Interval:   [%.4f, %.4f]\n", object@lower, object@upper))
})

The show method shows confidence level (as percentage), estimate, standard error, and the interval. It displays the confidence interval in a nice, readable format.

Code
setGeneric("lb", function(object) standardGeneric("lb"))
[1] "lb"
Code
setMethod("lb", "waldCI", function(object) {
  object@lower
})

setGeneric("ub", function(object) standardGeneric("ub"))
[1] "ub"
Code
setMethod("ub", "waldCI", function(object) {
  object@upper
})

setGeneric("mean", function(x, ...) standardGeneric("mean"))
[1] "mean"
Code
setMethod("mean", "waldCI", function(x, ...) {
  x@mean
})

setGeneric("sterr", function(object) standardGeneric("sterr"))
[1] "sterr"
Code
setMethod("sterr", "waldCI", function(object) {
  object@se
})

setGeneric("level", function(object) standardGeneric("level"))
[1] "level"
Code
setMethod("level", "waldCI", function(object) {
  object@level
})

Accessor methods get the values from the object.

Code
setGeneric("lb<-", function(object, value) standardGeneric("lb<-"))
[1] "lb<-"
Code
setMethod("lb<-", "waldCI", function(object, value) {
  # Update the lower bound
  object@lower <- value
  
  # Recalculate mean and SE based on new bounds
  z <- qnorm(1 - (1 - object@level) / 2)
  object@mean <- (object@lower + object@upper) / 2
  object@se <- (object@upper - object@lower) / (2 * z)
  
  validObject(object)
  
  return(object)
})

setGeneric("ub<-", function(object, value) standardGeneric("ub<-"))
[1] "ub<-"
Code
setMethod("ub<-", "waldCI", function(object, value) {
  # Update the upper bound
  object@upper <- value
  
  # Recalculate mean and SE based on new bounds
  z <- qnorm(1 - (1 - object@level) / 2)
  object@mean <- (object@lower + object@upper) / 2
  object@se <- (object@upper - object@lower) / (2 * z)
  
  validObject(object)
  
  return(object)
})

setGeneric("mean<-", function(object, value) standardGeneric("mean<-"))
[1] "mean<-"
Code
setMethod("mean<-", "waldCI", function(object, value) {
  # Calculate the shift needed
  shift <- value - object@mean
  
  # Update mean and bounds (shift the entire interval)
  object@mean <- value
  object@lower <- object@lower + shift
  object@upper <- object@upper + shift

  validObject(object)
  
  return(object)
})

setGeneric("sterr<-", function(object, value) standardGeneric("sterr<-"))
[1] "sterr<-"
Code
setMethod("sterr<-", "waldCI", function(object, value) {
  if (value <= 0) {
    stop("Standard error must be positive")
  }
  
  # Update SE
  object@se <- value
  
  # Recalculate bounds based on new SE (keeping mean fixed)
  z <- qnorm(1 - (1 - object@level) / 2)
  object@lower <- object@mean - z * value
  object@upper <- object@mean + z * value
  
  validObject(object)
  
  return(object)
})

setGeneric("level<-", function(object, value) standardGeneric("level<-"))
[1] "level<-"
Code
setMethod("level<-", "waldCI", function(object, value) {
  if (value <= 0 || value >= 1) {
    stop("Confidence level must be between 0 and 1")
  }
  
  # Update level
  object@level <- value
  
  # Recalculate bounds based on new level (keeping mean and SE fixed)
  z <- qnorm(1 - (1 - value) / 2)
  object@lower <- object@mean - z * object@se
  object@upper <- object@mean + z * object@se
  
  validObject(object)
  
  return(object)
})

Setter methods set or change values in the object.

Code
setGeneric("contains", function(object, value) standardGeneric("contains"))
[1] "contains"
Code
setMethod("contains", "waldCI", function(object, value) {
  # Check if value is between lower and upper bounds (inclusive)
  value >= object@lower & value <= object@upper
})

The contains method checks if a given value falls within the confidence interval. It returns TRUE if the value is between (or equal to) the lower and upper bounds. It returns FALSE otherwise.

Code
setGeneric("overlap", function(ci1, ci2) standardGeneric("overlap"))
[1] "overlap"
Code
setMethod("overlap", signature(ci1 = "waldCI", ci2 = "waldCI"), 
          function(ci1, ci2) {
  ci1@lower <= ci2@upper & ci1@upper >= ci2@lower
})

The overlap method checks if two confidence intervals overlap. It returns TRUE if they overlap, FALSE if they don’t.

Code
setGeneric("as.numeric")
[1] "as.numeric"
Code
setMethod("as.numeric", signature(x = "waldCI"), function(x, ...) {
  c(x@lower, x@upper)
})

The as.numeric method converts a waldCI object to a numeric vector. It returns c(lower_bound, upper_bound)

Code
setGeneric("transformCI", function(ci, func) standardGeneric("transformCI"))
[1] "transformCI"
Code
setMethod("transformCI", signature(ci = "waldCI", func = "function"),
          function(ci, func) {
  
  warning("Only monotonic functions make sense for transforming confidence intervals. ",
          "Non-monotonic functions may produce invalid or meaningless results.",
          call. = FALSE)
  
  transformed_mean <- func(ci@mean)
  transformed_lower <- func(ci@lower)
  transformed_upper <- func(ci@upper)
  
  new_lower <- min(transformed_lower, transformed_upper)
  new_upper <- max(transformed_lower, transformed_upper)
  
  make_wald_ci(level = ci@level, 
               lower = new_lower, 
               upper = new_upper)
})

The transformCI method applies a mathematical function to a confidence interval. It transforms the mean, lower bound, and upper bound using the given function. It warns users that only monotonic functions make sense.

Problem 1b

Code
library(methods)

# Create the three CI objects
ci1 <- make_wald_ci(level = 0.95, lower = 17.2, upper = 24.7)
ci2 <- make_wald_ci(level = 0.99, mean = 13, se = 2.5)
ci3 <- make_wald_ci(level = 0.75, lower = 27.43, upper = 39.22)
Code
ci1
Wald Confidence Interval
Level:      95.0%
Estimate:   20.9500
Std Error:  1.9133
Interval:   [17.2000, 24.7000]
Code
ci2
Wald Confidence Interval
Level:      99.0%
Estimate:   13.0000
Std Error:  2.5000
Interval:   [6.5604, 19.4396]
Code
ci3
Wald Confidence Interval
Level:      75.0%
Estimate:   33.3250
Std Error:  5.1245
Interval:   [27.4300, 39.2200]
Code
as.numeric(ci1)
[1] 17.2 24.7
Code
as.numeric(ci2)
[1]  6.560427 19.439573
Code
as.numeric(ci3)
[1] 27.43 39.22
Code
lb(ci2)
[1] 6.560427
Code
ub(ci2)
[1] 19.43957
Code
mean(ci1)
[1] 20.95
Code
sterr(ci3)
[1] 5.12453
Code
level(ci2)
[1] 0.99
Code
lb(ci2) <- 10.5
mean(ci3) <- 34
level(ci3) <- .8
contains(ci1, 17)
[1] FALSE
Code
contains(ci3, 44)
[1] FALSE
Code
overlap(ci1, ci2)
[1] TRUE
Code
eci1 <- transformCI(ci1, sqrt)
Warning: Only monotonic functions make sense for transforming confidence
intervals. Non-monotonic functions may produce invalid or meaningless results.
Code
eci1
Wald Confidence Interval
Level:      95.0%
Estimate:   4.5586
Std Error:  0.2099
Interval:   [4.1473, 4.9699]
Code
mean(transformCI(ci2, log))
Warning: Only monotonic functions make sense for transforming confidence
intervals. Non-monotonic functions may produce invalid or meaningless results.
[1] 2.659343

Problem 1c

Code
cat("Attempting to create CI with negative standard error\n")
Attempting to create CI with negative standard error
Code
tryCatch({
  bad_ci1 <- make_wald_ci(level = 0.95, mean = 10, se = -2)
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: se must be positive 
Code
cat("Attempting to create CI with lb > ub\n")
Attempting to create CI with lb > ub
Code
tryCatch({
  bad_ci2 <- make_wald_ci(level = 0.95, lower = 25, upper = 10)
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: lower must be less than upper 
Code
cat("Attempting to create CI with infinite bounds\n")
Attempting to create CI with infinite bounds
Code
tryCatch({
  bad_ci3 <- make_wald_ci(level = 0.95, lower = -Inf, upper = Inf)
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: Confidence interval bounds must be finite 
Code
cat("Attempting to set negative standard error\n")
Attempting to set negative standard error
Code
tryCatch({
  valid_ci <- make_wald_ci(level = 0.95, mean = 10, se = 2)
  sterr(valid_ci) <- -1.5
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: Standard error must be positive 
Code
cat("Attempting to set lower bound above upper bound\n")
Attempting to set lower bound above upper bound
Code
tryCatch({
  valid_ci <- make_wald_ci(level = 0.95, lower = 5, upper = 15)
  lb(valid_ci) <- 20  # This would make lb > ub
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: invalid class "waldCI" object: 1: se must be positive
invalid class "waldCI" object: 2: lower bound must be less than upper bound
invalid class "waldCI" object: 3: mean must be between lower and upper bounds 
Code
cat("Attempting to set upper bound below lower bound\n")
Attempting to set upper bound below lower bound
Code
tryCatch({
  valid_ci <- make_wald_ci(level = 0.95, lower = 5, upper = 15)
  ub(valid_ci) <- 3  # This would make ub < lb
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: invalid class "waldCI" object: 1: se must be positive
invalid class "waldCI" object: 2: lower bound must be less than upper bound
invalid class "waldCI" object: 3: mean must be between lower and upper bounds 
Code
cat("Attempting to create CI with level outside (0,1)\n")
Attempting to create CI with level outside (0,1)
Code
tryCatch({
  bad_ci7 <- make_wald_ci(level = 1.5, mean = 10, se = 2)
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: level must be between 0 and 1 (exclusive) 
Code
cat("Attempting to set confidence level outside (0,1)\n")
Attempting to set confidence level outside (0,1)
Code
tryCatch({
  valid_ci <- make_wald_ci(level = 0.95, mean = 10, se = 2)
  level(valid_ci) <- 0
}, error = function(e) {
  cat("Error caught:", e$message, "\n\n")
})
Error caught: Confidence level must be between 0 and 1 

The output shows that the waldCI class successfully prevents the creation and modification of invalid confidence intervals through proper validation.

Problem 3 - Plotly

Problem 3a

Code
covid_data <- read.csv("https://raw.githubusercontent.com/nytimes/covid-19-data/refs/heads/master/rolling-averages/us-states.csv")
Code
# Convert date to Date object
covid_data$date <- as.Date(covid_data$date)

# Sum across all states for each date
national_data <- covid_data %>%
  group_by(date) %>%
  summarise(
    total_cases = sum(cases_avg, na.rm = TRUE),
    .groups = 'drop'
  )

# Create the plotly visualization
fig1 <- plot_ly(
  data = national_data,
  x = ~date,
  y = ~total_cases,
  type = 'scatter',
  mode = 'lines',
  line = list(color = '#1f77b4', width = 2),
  hovertemplate = paste(
    '<b>Date:</b> %{x|%Y-%m-%d}<br>',
    '<b>7-Day Avg New Cases:</b> %{y:,.0f}<br>',
    '<extra></extra>'
  )
) %>%
  layout(
    title = list(
      text = "<b>COVID-19 Cases in the United States: Identifying Major Waves</b>",
      font = list(size = 18, family = "Arial")
    ),
    xaxis = list(
      title = "<b>Date</b>",
      titlefont = list(size = 14),
      showgrid = TRUE,
      gridcolor = '#E5E5E5',
      showline = TRUE,
      linecolor = 'black'
    ),
    yaxis = list(
      title = "<b>7-Day Average of New Cases (National Total)</b>",
      titlefont = list(size = 14),
      showgrid = TRUE,
      gridcolor = '#E5E5E5',
      tickformat = ',',
      showline = TRUE,
      linecolor = 'black'
    ),
    hovermode = 'x unified',
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    font = list(family = "Arial, sans-serif", size = 12),
    margin = list(l = 80, r = 50, t = 80, b = 80)
  ) %>%
  config(displayModeBar = TRUE, displaylogo = FALSE, 
         modeBarButtonsToRemove = c('lasso2d', 'select2d'))

fig1

Looking at the plot, I can clearly see five big spikes where cases went way up: the first one in spring 2020 (around 31,000 cases per day), a bigger one in summer 2020 (about 67,000 cases), a really large winter 2020-2021 wave (around 250,000 cases), the Delta wave in fall 2021 (about 160,000 cases), and the absolutely massive Omicron wave in January 2022 that shot up to over 800,000 cases per day—which is way higher than anything before it. Besides these five major peaks, the graph also shows two smaller bumps that we can call minor spikes: one in spring 2021 (around 70,000 cases) and another in summer 2022 (around 130,000 cases). After the Omicron wave, cases dropped down and stayed much lower through the rest of 2022 and into 2023, showing that the pandemic went through several distinct waves before settling down to lower levels.

Problem 3b

Code
# Calculate overall rates per state across the entire time period
state_overall_rates <- covid_data %>%
  group_by(state) %>%
  summarise(
    avg_rate_per_100k = mean(cases_avg_per_100k, na.rm = TRUE),
    total_period_rate = sum(cases_avg_per_100k, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  arrange(desc(total_period_rate))

# Identify the state with highest and lowest overall rates
highest_state <- state_overall_rates$state[1]
lowest_state <- state_overall_rates$state[nrow(state_overall_rates)]
cat("State with highest overall rate:", highest_state, "\n")
State with highest overall rate: Rhode Island 
Code
cat("State with lowest overall rate:", lowest_state, "\n")
State with lowest overall rate: Maine 
Code
# Filter data for these two states
comparison_states <- covid_data %>%
  filter(state %in% c(highest_state, lowest_state))

# Create the plotly visualization 
fig2 <- plot_ly() %>%
  add_trace(
    data = comparison_states %>% filter(state == highest_state),
    x = ~date,
    y = ~cases_avg_per_100k,
    type = 'scatter',
    mode = 'lines',
    name = highest_state,
    line = list(color = '#d62728', width = 2.5),
    hovertemplate = paste(
      '<b>', highest_state, '</b><br>',
      'Date: %{x|%Y-%m-%d}<br>',
      'Cases per 100k: %{y:.1f}<br>',
      '<extra></extra>'
    )
  ) %>%
  add_trace(
    data = comparison_states %>% filter(state == lowest_state),
    x = ~date,
    y = ~cases_avg_per_100k,
    type = 'scatter',
    mode = 'lines',
    name = lowest_state,
    line = list(color = '#2ca02c', width = 2.5),
    hovertemplate = paste(
      '<b>', lowest_state, '</b><br>',
      'Date: %{x|%Y-%m-%d}<br>',
      'Cases per 100k: %{y:.1f}<br>',
      '<extra></extra>'
    )
  ) %>%
  layout(
    title = list(
      text = "<b>COVID-19 Trajectories: Highest vs Lowest Rate States</b><br><sub>Comparing per capita case rates over time</sub>",
      font = list(size = 18, family = "Arial")
    ),
    xaxis = list(
      title = "<b>Date</b>",
      titlefont = list(size = 14),
      showgrid = TRUE,
      gridcolor = '#E5E5E5',
      showline = TRUE,
      linecolor = 'black'
    ),
    yaxis = list(
      title = "<b>7-Day Average Cases per 100,000 Population</b>",
      titlefont = list(size = 14),
      showgrid = TRUE,
      gridcolor = '#E5E5E5',
      showline = TRUE,
      linecolor = 'black'
    ),
    hovermode = 'x unified',
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    legend = list(
      orientation = 'h',
      x = 0.5,
      xanchor = 'center',
      y = -0.15,
      font = list(size = 12)
    ),
    font = list(family = "Arial, sans-serif", size = 12),
    margin = list(l = 80, r = 50, t = 100, b = 100)
  ) %>%
  config(displayModeBar = TRUE, displaylogo = FALSE,
         modeBarButtonsToRemove = c('lasso2d', 'select2d'))

fig2

Looking at the graph, Rhode Island (red line) clearly had much higher COVID-19 rates than Maine (green line) throughout almost the entire pandemic. The biggest difference shows up during the Omicron wave in January 2022, where Rhode Island’s peak shoots up to around 500 cases per 100k people while Maine only reaches about 80 cases per 100k. Even during the earlier winter 2020-2021 wave, Rhode Island peaked at around 120 cases per 100,000 while Maine stayed below 50. Rhode Island’s peaks are also much sharper and more dramatic—the line goes up and down very steeply while Maine’s peaks are gentler and more gradual.

Problem 3c

Code
# Define "substantial" COVID as reaching a threshold of 5 cases per 100k on the 7-day rolling average
threshold <- 5

# Find the first date each state reached the threshold
first_substantial <- covid_data %>%
  filter(cases_avg_per_100k >= threshold) %>%
  group_by(state) %>%
  summarise(first_date = min(date), .groups = 'drop') %>%
  arrange(first_date) %>%
  head(5)

cat("First five states to experience substantial COVID-19:\n")
First five states to experience substantial COVID-19:
Code
print(first_substantial)
# A tibble: 5 × 2
  state         first_date
  <chr>         <date>    
1 New York      2020-03-21
2 New Jersey    2020-03-24
3 Louisiana     2020-03-26
4 Massachusetts 2020-03-27
5 Connecticut   2020-03-28
Code
early_states <- first_substantial$state

# Filter data for these five states, focusing on early pandemic period
early_states_data <- covid_data %>%
  filter(state %in% early_states,
         date >= as.Date("2020-03-01"),
         date <= as.Date("2020-07-01"))

colors <- c('#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd')

# Create the plotly visualization
fig3 <- plot_ly()

# Add a trace for each of the five states
for (i in 1:length(early_states)) {
  state_data <- early_states_data %>% filter(state == early_states[i])
  
  fig3 <- fig3 %>%
    add_trace(
      data = state_data,
      x = ~date,
      y = ~cases_avg_per_100k,
      type = 'scatter',
      mode = 'lines',
      name = early_states[i],
      line = list(color = colors[i], width = 2.5),
      hovertemplate = paste(
        '<b>', early_states[i], '</b><br>',
        'Date: %{x|%Y-%m-%d}<br>',
        'Cases per 100k: %{y:.1f}<br>',
        '<extra></extra>'
      )
    )
}

# Add vertical dashed lines showing when each state crossed the threshold
for (i in 1:nrow(first_substantial)) {
  fig3 <- fig3 %>%
    add_segments(
      x = first_substantial$first_date[i],
      xend = first_substantial$first_date[i],
      y = 0,
      yend = threshold,
      line = list(color = colors[i], dash = 'dash', width = 1.5),
      showlegend = FALSE,
      hovertemplate = paste(
        '<b>', first_substantial$state[i], '</b><br>',
        'First crossed 5 per 100k<br>',
        'Date: ', format(first_substantial$first_date[i], '%Y-%m-%d'),
        '<extra></extra>'
      )
    )
}

# Add horizontal reference line at threshold
fig3 <- fig3 %>%
  add_segments(
    x = as.Date("2020-03-01"),
    xend = as.Date("2020-07-01"),
    y = threshold,
    yend = threshold,
    line = list(color = 'black', dash = 'dot', width = 1),
    showlegend = FALSE,
    name = "Threshold (5 per 100k)"
  )

fig3 <- fig3 %>%
  layout(
    title = list(
      text = "<b>First Five States to Experience Substantial COVID-19 Transmission</b><br><sub>Substantial defined as reaching 5 cases per 100,000 population (7-day average)</sub>",
      font = list(size = 18, family = "Arial")
    ),
    xaxis = list(
      title = "<b>Date</b>",
      titlefont = list(size = 14),
      showgrid = TRUE,
      gridcolor = '#E5E5E5',
      showline = TRUE,
      linecolor = 'black',
      range = c(as.Date("2020-03-01"), as.Date("2020-07-01"))
    ),
    yaxis = list(
      title = "<b>7-Day Average Cases per 100,000 Population</b>",
      titlefont = list(size = 14),
      showgrid = TRUE,
      gridcolor = '#E5E5E5',
      showline = TRUE,
      linecolor = 'black'
    ),
    hovermode = 'closest',
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    legend = list(
      orientation = 'v',
      x = 1.02,
      xanchor = 'left',
      y = 1,
      font = list(size = 11)
    ),
    font = list(family = "Arial, sans-serif", size = 12),
    margin = list(l = 80, r = 120, t = 100, b = 80),
    annotations = list(
      list(
        x = 0.5,
        y = -0.12,
        xref = 'paper',
        yref = 'paper',
        text = 'Dashed vertical lines indicate when each state first reached 5 cases per 100k',
        showarrow = FALSE,
        font = list(size = 10, color = 'gray')
      )
    )
  ) %>%
  config(displayModeBar = TRUE, displaylogo = FALSE,
         modeBarButtonsToRemove = c('lasso2d', 'select2d'))

fig3

Looking at the graph, the first five states to experience substantial COVID-19 were New York, New Jersey, Louisiana, Massachusetts, and Connecticut. The dashed vertical lines near the bottom of the graph show when each state crossed this threshold, and they’re all clustered between roughly March 22-29, 2020, meaning these states were hit almost at the same time during the pandemic’s initial arrival in the U.S. New York (blue line) was the very first to cross the threshold, followed closely by the others within about a week.

Attribution of Sources

Problem 1: Used both these resources for better understanding S4 and Claude to fix errors https://adv-r.hadley.nz/s4.html https://caleb-huo.github.io/teaching/2017FALL/lectures/week13_OOP/OOP/IntroToS4-BiostatComputing.html

Problem 2: Used Claude to fix errors, improve syntax and refine the plotting components (such as color choices, layout adjustments)

Github Repository

https://github.com/prathii7/Computational-Methods-and-Tools-in-Statistics